** Data reading and variable selection from raw data
** Japanese General Social Survey 2002


** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/ 
numlabel, add


** 02. Consructing year and country variables **

ge year=2002
lab var year "survey year"

ge country=392
lab var country "ISO country code"
//Japan: 392 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=IDUSE
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=SEXA
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

ge age=AGEB
lab var age "age"

ge birthyr=year-age
lab var birthyr "year of birth"


** 05. Siblings **

ge nbro=XNUMBROE+XNUMBROY
ge nsis=XNUMSISE+XNUMSISY

ge nsibs=nbro+nsis

ge birthorder=XNUMBROE+XNUMSISE+1

lab var nbro "number of brothers"
lab var nsis "number of sisters"
lab var nsibs "number of siblings"
lab var birthorder "birth order"
//missing
lab def nbro 1998 "missing"
lab val nbro nbro

lab def nsis 1998 "missing"
lab val nsis nsis

lab def nsib 3996 "missing"
lab val nsib nsib

lab def birthorder 1999 "missing"
lab val birthorder birthorder


** 06. Own education **

ge educ_a=XXLSTSCH 
lab var educ_a "last school attended"

lab def educ_a 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "2-year college/College of technology" 11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"

lab val educ_a educ_a

ge educ_c=DOLSTSCH
la var educ_c "completion of the last school"
lab def educ_c 1 "graduated" 2 "quit" 3 "still a student" 9 "no answer"
lab val educ_c educ_c

ge educ=educ_a if educ_c==1 | educ_c==3
replace educ=educ_a-1 if educ_c==2 & educ_a<13
replace educ=13 if educ_c==2 & educ_a>=13
replace educ=99 if educ_c==9

lab def educ 0 "no completed formal education" 1 "ordinary elementary_old system" 2 "higher elementary_old system" ///
3 "junior high/girls high_old system" 4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" ///
7 "university/graduate_old system" 8 "junior high" 9 "high" 10 "2-year college/College of technology" ///
11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"

lab val educ educ


** 07. Parents' education: Father and/or Mother **

ge faeduc=PPLSTSCH
lab var faeduc "father's education"

lab def faeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "2-year college/College of technology" 11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"

lab val faeduc faeduc 

ge moeduc=MMLSTSCH
lab var moeduc "mother's education"
lab def moeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "2-year college/College of technology" 11 "university" 12 "graduate school" 13 "don't know" 99 "no answer"

lab val moeduc moeduc

** 08. Own occupation **

ge firstocc=XXFSTJB
lab var firstocc "first occupation"
ge lastocc=XXLSTJB
lab var lastocc "last occupation"
ge occ=XXJOB
lab var occ "current occupation"
lab def occ 986 "no occupation" 998 "not applicable" 999 "no answer"
lab val firstocc lastocc occ occ

ge firstempstat=TP12FSTJ
lab var firstempstat "employment status of first occupation"
ge lastempstat=TP12LSTJ
lab var lastempstat "employment status of last occupation"
ge empstat=TP12JOB
lab var empstat "employment status of current occupation"

lab def empstat 1 "Executive of a company or a corporation" 2 "Regular employee with no managerial post temporary worker" ///
3 "Regular employee - Group leader, Foreman" 4 "Regular employee - Sub-section Head agency or equivalent - kakaricho" ///
5 "Regular employee - Section Head, Manager or equivalent - kacho" 6 "Regular employee - Department Head, General Manager or equivalent - bucho" ///
7 "Regular employee - managerial status unknown" 8 "Temporary worker, Daily worker, Part-time" 9 "Dispatched worker from temporary personnel" ///
10 "Self-employed" 11 "Family worker" 12 "Doing piecework at home" 13 "Don't know" 88 "Not applicable" 99 "No answer"

lab val firstempstat lastempstat empstat empstat


** 09. Parents' occupation **

ge faocc_y=PPJBXX15
lab var faocc_y "father's occupation when respondent's 15"
lab def faocc_y 986 "no occupation" 998 "not applicable" 999 "no answer"
lab val faocc_y faocc_y

ge faempstat_y=PPJBTP15
lab var faempstat_y "father's employment status when respondent's 15"

lab def faempstat_y 1 "Executive of a company or a corporation" 2 "Regular employee with no managerial post temporary worker" ///
3 "Regular employee - Group leader, Foreman" 4 "Regular employee - Sub-section Head agency or equivalent - kakaricho" ///
5 "Regular employee - Section Head, Manager or equivalent - kacho" 6 "Regular employee - Department Head, General Manager or equivalent - bucho" ///
7 "Regular employee - managerial status unknown" 8 "Temporary worker, Daily worker, Part-time" 9 "Dispatched worker from temporary personnel" ///
10 "Self-employed" 11 "Family worker" 12 "Doing piecework at home" 13 "I didnt have a father at the time" ///
14 "Don't know" 88 "Not applicable" 99 "No answer"

lab val faempstat_y faempstat_y

ge moempstat_y=MMJBTP15
lab var moempstat_y "mother's employment status when respondent's 15"

lab def moempstat_y 1 "She was not working" 2 "Temporary worker, Daily worker, Part-time worker" ///
3 "Regular employee - non-management position" 4 "Regular employee - managerial position" 5 "Regular employee - professional like nurse & teacher" ///
6 "Regular employee - dont know about occupation" 7 "self-employed/family worker - agricultural" 8 "self-employed/family worker - other" ///
9 "doing piecework at home" 10 "executive of a company or a corporation" 11 "I didnt have a mother at the time" 12 "dont know" 99 "no answer"

lab val moempstat_y moempstat_y


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** Japan General Social Survey 2002

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis birthorder, d

** R's Own Education **
tab1 educ 

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 occ empstat 

** Parental Occupation **
tab1 faocc_y faempstat_y moempstat_y 

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nbro nsis nsibs birthorder ///
	 educ faeduc moeduc ///
	 firstocc firstempstat lastocc lastempstat occ empstat ///
	 faocc_y faempstat_y moempstat_y


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace

** 13. Homoginising education **
** Own Education **
rename educ educ_cat

ge educ_yrs=0 if educ_cat==0
replace educ_yrs=6 if educ_cat==1
replace educ_yrs=8 if educ_cat==2
replace educ_yrs=10 if educ_cat==3
replace educ_yrs=11 if educ_cat==4
replace educ_yrs=14 if educ_cat==5
replace educ_yrs=14 if educ_cat==6
replace educ_yrs=16 if educ_cat==7
replace educ_yrs=9 if educ_cat==8
replace educ_yrs=12 if educ_cat==9
replace educ_yrs=14 if educ_cat==10
replace educ_yrs=16 if educ_cat==11
replace educ_yrs=20 if educ_cat==12
replace educ_yrs=. if educ_cat==13
replace educ_yrs=. if educ_cat==99
lab var educ_yrs "respondent highest education in years"

ge educ_ISCED=020 if educ_cat==0
replace educ_ISCED=100 if educ_cat==1
replace educ_ISCED=100 if educ_cat==2
replace educ_ISCED=300 if educ_cat==3
replace educ_ISCED=300 if educ_cat==4
replace educ_ISCED=300 if educ_cat==5
replace educ_ISCED=500 if educ_cat==6
replace educ_ISCED=600 if educ_cat==7
replace educ_ISCED=244 if educ_cat==8
replace educ_ISCED=344 if educ_cat==9
replace educ_ISCED=500 if educ_cat==10
replace educ_ISCED=665 if educ_cat==11
replace educ_ISCED=767 if educ_cat==12
replace educ_ISCED=. if educ_cat==13
replace educ_ISCED=. if educ_cat==99
lab var educ_ISCED "respondent highest education in ISCED code"

** Parents Education **
//father's education is actually father's
ge faeduc_flag=1 

rename faeduc faeduc_cat
rename moeduc maeduc_cat

ge faeduc_yrs=0 if faeduc_cat==0
replace faeduc_yrs=6 if faeduc_cat==1
replace faeduc_yrs=8 if faeduc_cat==2
replace faeduc_yrs=10 if faeduc_cat==3
replace faeduc_yrs=11 if faeduc_cat==4
replace faeduc_yrs=14 if faeduc_cat==5
replace faeduc_yrs=14 if faeduc_cat==6
replace faeduc_yrs=16 if faeduc_cat==7
replace faeduc_yrs=9 if faeduc_cat==8
replace faeduc_yrs=12 if faeduc_cat==9
replace faeduc_yrs=14 if faeduc_cat==10
replace faeduc_yrs=16 if faeduc_cat==11
replace faeduc_yrs=20 if faeduc_cat==12
replace faeduc_yrs=. if faeduc_cat==13
replace faeduc_yrs=. if faeduc_cat==99
lab var faeduc_yrs "father's education in years"

ge maeduc_yrs=0 if maeduc_cat==0
replace maeduc_yrs=6 if maeduc_cat==1
replace maeduc_yrs=8 if maeduc_cat==2
replace maeduc_yrs=10 if maeduc_cat==3
replace maeduc_yrs=11 if maeduc_cat==4
replace maeduc_yrs=14 if maeduc_cat==5
replace maeduc_yrs=14 if maeduc_cat==6
replace maeduc_yrs=16 if maeduc_cat==7
replace maeduc_yrs=9 if maeduc_cat==8
replace maeduc_yrs=12 if maeduc_cat==9
replace maeduc_yrs=14 if maeduc_cat==10
replace maeduc_yrs=16 if maeduc_cat==11
replace maeduc_yrs=20 if maeduc_cat==12
replace maeduc_yrs=. if maeduc_cat==13
replace maeduc_yrs=. if maeduc_cat==99
lab var maeduc_yrs "mother's education in years"

ge faeduc_ISCED=020 if faeduc_cat==0
replace faeduc_ISCED=100 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=300 if faeduc_cat==3
replace faeduc_ISCED=300 if faeduc_cat==4
replace faeduc_ISCED=300 if faeduc_cat==5
replace faeduc_ISCED=500 if faeduc_cat==6
replace faeduc_ISCED=600 if faeduc_cat==7
replace faeduc_ISCED=244 if faeduc_cat==8
replace faeduc_ISCED=344 if faeduc_cat==9
replace faeduc_ISCED=500 if faeduc_cat==10
replace faeduc_ISCED=665 if faeduc_cat==11
replace faeduc_ISCED=767 if faeduc_cat==12
replace faeduc_ISCED=. if faeduc_cat==13
replace faeduc_ISCED=. if faeduc_cat==99
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_ISCED=020 if maeduc_cat==0
replace maeduc_ISCED=100 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=300 if maeduc_cat==3
replace maeduc_ISCED=300 if maeduc_cat==4
replace maeduc_ISCED=300 if maeduc_cat==5
replace maeduc_ISCED=500 if maeduc_cat==6
replace maeduc_ISCED=600 if maeduc_cat==7
replace maeduc_ISCED=244 if maeduc_cat==8
replace maeduc_ISCED=344 if maeduc_cat==9
replace maeduc_ISCED=500 if maeduc_cat==10
replace maeduc_ISCED=665 if maeduc_cat==11
replace maeduc_ISCED=767 if maeduc_cat==12
replace maeduc_ISCED=. if maeduc_cat==13
replace maeduc_ISCED=. if maeduc_cat==99
lab var maeduc_ISCED "mother highest education in ISCED code"

** 14. Homoginising sibling**
//cutoff
ge nbro_flag=99
lab var nbro_flag "cutoff of number of brothers"
ge nsis_flag=99
lab var nsis_flag "cutoff of number of sisters"
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"

lab def nsib_flag 99 "no cutoff"
lab val nbro_flag nsis_flag nsibs_flag nsib_flag

//recode missing
replace nbro=. if nbro==1998
replace nsis=. if nsis==1998
replace nsibs=. if nsibs==3996

** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nbro nsis nsibs nbro_flag nsis_flag nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace

